# -*- coding: UTF-8 -*-
# @ Author : 游于艺
# @ QQ : 58012007
# @ File: zy_day35_游于艺.py
# @ Time: 2018/4/20 17:57
"""
1. 爬取以下站点中明星图片，各明星图片单独建文件夹存放。
起始URL地址：http://www.mm131.com/mingxing
提交作业代码 上传gitees
"""
import os
import time
import requests
from bs4 import BeautifulSoup
import lxml

headers = {
    'User-Agent': "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/64.0.3282.119 Safari/537.36",
    "Referer": "http://www.mm131.com/mingxing/",
    "Cookie": "UM_distinctid=162e27dfd0614-07d6d9acfd136d-3b604c04-144000-162e27dfd077e4; CNZZDATA3866066=cnzz_eid%3D291379987-1494676185-%26ntime%3D1494676185; bdshare_firstime=1524218330454; Hm_lvt_9a737a8572f89206db6e9c301695b55a=1524218330; Hm_lpvt_9a737a8572f89206db6e9c301695b55a=1524219170"
}

def get_all():
    url = 'http://www.mm131.com/mingxing'
    list_html = requests.get(url, headers=headers)
    list_html.encoding = 'gb18030'
    # print(list_html.text)
    imgs = BeautifulSoup(list_html.text, 'lxml').find('dl', class_='public-box').find_all('a',class_="",attrs={"target": "_blank"})
    for img in imgs:
        href = img["href"]
        dirname = img.get_text()
        headers["Referer"] = href
        page_html = requests.get(href, headers=headers)
        page_html.encoding = 'gb18030'
        max_page = int(BeautifulSoup(page_html.text, 'lxml').find('span', class_='page-ch').get_text()[1:-1])
        if max_page > 5:    #不想下载太多图
            max_page = 5
        # print(max_page)

        child_dir = dirname
        to_save_path = mkdir("./pics/", child_dir)
        print("当前套图共%s页" % (max_page))
        if to_save_path:
            for page in range(1, int(max_page) + 1):
                if page == 1:
                    page_url = href
                else:
                    page_url = href.replace(".html","_"+str(page)+".html")
                img_url = get_img_url(page_url)
                time.sleep(1)
                download_img(img_url, to_save_path)


# 创建文件夹
def mkdir(f_dir_name, path):
    try:
        path = path.strip()
        child_dir_name = os.path.join(f_dir_name, path)
        existed = os.path.exists(child_dir_name)
        if not existed:
            print("新建文件夹:%s" % (path))
            os.makedirs(child_dir_name)
        else:
            print("文件夹%s已存在!" % (path))
        return child_dir_name
    except:
        return None
# 保存图片
def download_img(img_url, dir_name):
    name = img_url.split('/')[-1]
    name = os.path.join(dir_name, name)
    img = requests.get(img_url, headers=headers)
    with open(name, 'ab') as f:
        f.write(img.content)

# 获取图片链接
def get_img_url(page_url):
    img_html = requests.get(page_url, headers=headers)
    img_url = BeautifulSoup(img_html.text, 'lxml').find('div', class_='content-pic').find('img')['src']
    return img_url


if __name__ == "__main__":
    get_all()